rm(list=ls())# Record timing# Start timert_start <-proc.time()# Set optionsknitr::opts_chunk$set(echo =TRUE,warning =FALSE,message =FALSE,fig.align ='center',fig.retina =2)library(tinytex)library(ggplot2)library(gt)library(survival)library(data.table)library(randomForest)library(grf)library(policytree)library(DiagrammeR)# library(devtools)# install_github("larry-leon/forestsearch", force = TRUE)library(forestsearch)library(weightedsurv)# Set theme for plotstheme_set(theme_minimal(base_size =12))
1 Summary
Reproducing main GBSG analysis
1.1 Datasetup
Code
df.analysis <- gbsgdf.analysis <-within(df.analysis,{id <-as.numeric(c(1:nrow(df.analysis))) # time to monthstime_months <- rfstime/30.4375grade3 <-ifelse(grade=="3",1,0)treat <- hormon})confounders.name <-c("age","meno","size","grade3","nodes","pgr","er")outcome.name <-c("time_months")event.name <-c("status")id.name <-c("id")treat.name <-c("hormon")
# NOTE: In general for GRF trees# leaf1 --> recommend control# leaf2 --> recommend treatment# Tree depth 1plot(grf_est1$tree1,leaf.labels=c("Control","Treat"))
Code
# Tree depth 2plot(grf_est1$tree2,leaf.labels=c("Control","Treat"))
1.4 Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processinglibrary(doFuture)
Warning: package 'doFuture' was built under R version 4.5.2
Warning: package 'future' was built under R version 4.5.2
# patchhwork needed for a combined bootstrap plot (otherwise if not available or very few bootstraps do not produce)library(patchwork)# Number of bootstrap samplesNB <-3system.time({fs_bc <-forestsearch_bootstrap_dofuture(fs.est = fs, nb_boots = NB, show_three =FALSE, details =TRUE)})
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 3
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 3 (100.0%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 3 bootstrap iterations
Category
Metric
Value
Success Rate
Total iterations
3
Successful
3 (100.0%)
Failed
0 (0.0%)
Success rating
Excellent
Subgroup H (Questionable)
Observed HR
2.537
Bias-corrected HR
2.346
Bootstrap CV (%)
61.7%
N estimates
3
Subgroup Hc (Recommend)
Observed HR
0.608
Bias-corrected HR
0.553
Bootstrap CV (%)
31.1%
N estimates
3
Code
summaries$subgroup_summary$original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 3
2: Successful iterations 3
3: Failed iterations (no subgroup) 0
4:
5: Original subgroup definition {er <= 0} & {size <= 35}
6: Exact match with original 0 (0.0%)
7: Different from original 3 (100.0%)
8: Partial match (shared factor) 1 (33.3%)
# 1 iteration of 2-fold cross-validation (in practice use 10-fold and at least 50 (Ksims) iterations)Ksims <-1system.time({fs_ten <-forestsearch_tenfold(fs.est = fs, sims = Ksims, Kfolds =2, details =FALSE, parallel_args =list(plan ="multisession", workers =13, show_message =TRUE))})plan(sequential)print(fs_ten$find_summary)print(fs_ten$sens_summary)print(head(fs_ten$sens_out))print(head(fs_ten$find_out))
1.7 Forest Search OOB
Code
# OOB = out-of-bag prediction# Kfolds = n (default to n-fold cross-validations)# Kfold = 2 for illustrationfs_OOB <-forestsearch_Kfold(fs.est = fs, details =TRUE, Kfolds =2,parallel_args =list(plan ="callr", workers =13, show_message =TRUE))# Reset workers to singleplan(sequential)summary_OOB <-forestsearch_KfoldOut(res=fs_OOB, details=TRUE, outall=TRUE)table(summary_OOB$SGs_found[,1])table(summary_OOB$SGs_found[,2])
Code
# Define subgroups to display (these could be pre-specified or of post-hoc interest)subgroups <-list(age_gt65 =list(subset_expr ="age > 65",name ="age > 65",type ="reference" ),age_lt65 =list(subset_expr ="age <= 65",name ="age <= 65",type ="reference" ),pgr_positive =list(subset_expr ="pgr > 0",name ="pgr > 0",type ="reference" ),pgr_negative =list(subset_expr ="pgr <= 0",name ="pgr <= 0",type ="reference" ) )# If fs_OOB and fs_kfold are provided then CV metrics are displayed for both# Create the forest plot result <-plot_subgroup_results_forestplot(fs_results =list(fs.est = fs, fs_bc = fs_bc, fs_OOB =NULL, fs_kfold =NULL),df_analysis = df.analysis,subgroup_list = subgroups,outcome.name ="time_months",event.name ="status",treat.name ="hormon",E.name ="Hormon",C.name ="CT",ci_column_spaces =25 )# Display the plotplot(result$plot)